# insurance <- read.table("BBDD_DEFINITIVA_V4.txt", header = T)
# saveRDS(insurance, "BBDD_DEFINITIVA_V4.RDS")
insurance <- readRDS("BBDD_DEFINITIVA_V4.RDS")CLAVE: Identification of the person
PERIODO: Period (year)
COVER: Number of coverages (1, 2 or 3)
SEXO:
FUMADOR:
BUENA_SALUD: Yes or No
EDAD_ACTUARIAL: actuarial age in each year
EDAD_SUS: actuarial age in the moment of suscription
EXP: Tiempo de exposición de la póliza al riesgo de fallecimiento durante el año.
mortality: Yes or No
DURACION: Number of years in force of the policy
DURACION_CAT: Rounded duration
CAPITAL: Capital asegurado. Dinero que recibirá el beneficiario en caso de mortality
CAPITAL_CAT1:
CAPITAL_CAT2:
CAPITAL_CAT3:
CAPITAL_CAT1_DESC:
CAPITAL_CAT2_DESC:
CAPITAL_CAT3_DESC:
IMC:
IMC_CAT1:
IMC_CAT2:
IMC_CAT3:
IMC_CAT1_DESC:
IMC_CAT2_DESC:
insurance <- insurance %>%
rename(
key = CLAVE,
actuarial_age = EDAD_ACTUARIAL,
sus_age = EDAD_SUS,
duration_cat = DURACION_CAT,
capital = CAPITAL,
) %>%
mutate(
period = factor(insurance$PERIODO),
cover = factor(insurance$COVER),
sex = factor(ifelse(insurance$SEXO == 0, "man", "woman")),
smoker = factor(ifelse(insurance$FUMADOR == 0, "no", "yes")),
good_health = factor(ifelse(insurance$BUENA_SALUD == 1, "yes", "no")),
exp = as.numeric(gsub(",", ".", gsub("\\.", "", insurance$EXP))),
mortality = factor(ifelse(insurance$SINIESTRO == 1, "yes", "no")),
duration = as.numeric(gsub(",",".", gsub("\\.", "", insurance$DURACION))),
capital_factor_1 = factor(insurance$CAPITAL_CAT1_DESC,
levels = c("0-30.000", "30.001-60.000",
"60.001-90.000","90.001-120.000",
"120.001-150.000", "+150.000"),
ordered = T),
capital_factor_2 = factor(insurance$CAPITAL_CAT2_DESC,
levels = c("0-60.000", "60.001-120.000",
"+120.000"),
ordered = T),
capital_factor_3 = factor(insurance$CAPITAL_CAT3_DESC,
levels = c("0-100.000", "100.001-200.000",
"+200.000"),
ordered = T),
IMC = as.numeric(gsub(",", ".", gsub("\\.", "", insurance$IMC))),
IMC_factor_1 = factor(insurance$IMC_CAT1_DESC,
levels = c("Normal", "Riesgo"),
labels = c("normal", "risk")),
IMC_factor_2 = factor(insurance$IMC_CAT2_DESC,
levels = c("Normal", "Sobrepeso"),
labels = c("normal", "overweight"))
) %>%
select(
key, period, cover, sex, smoker, good_health, actuarial_age, sus_age, exp,
mortality, duration, duration_cat, capital, capital_factor_1, capital_factor_2,
capital_factor_3, IMC, IMC_factor_1, IMC_factor_2
)Here, we remove the repeated columns
insurance <- tibble::rowid_to_column(insurance, "ID")
# here we take the IDs that are not repeated
IDs <- insurance %>%
arrange(desc(mortality)) %>%
distinct_at(vars(key, period, cover), .keep_all = T) %>%
select(ID)
# here we ensure that the rows we are removing have mortality = 0
insurance %>%
filter(!(ID %in% IDs$ID)) %>%
select(mortality) %>%
summary()## mortality
## no :63
## yes: 0
insurance <- insurance %>%
filter(ID %in% IDs$ID)
insuranceAlso, we create another df of only the people that had some sinister
# mortalityS
key_mortality <- insurance %>% # tomamos todas las claves de las personas con mort
filter(mortality == "yes") %>%
select(key)
insurance_mortality <- insurance %>%
filter(key %in% key_mortality$key)
insurance_mortalityAnd another one with the people that drop
# lapse
# Here we take the keys of the lapse
keys_lapse <- insurance %>%
filter((key %in% key_mortality$key) == F) %>%
group_by(key) %>%
count() %>%
mutate(lapse = ifelse(n == 1 | n == 2, "yes", "no")) %>%
filter(lapse == "yes") %>%
select(key)
insurance_lapse <- insurance %>%
filter((key %in% key_mortality$key) == F) %>%
mutate(lapse = ifelse(key %in% keys_lapse$key, "yes", "no")) %>%
arrange(key)
for (i in 2:nrow(insurance_lapse)){
if (insurance_lapse$lapse[i-1] == "yes") {
if (insurance_lapse$key[i-1] == insurance_lapse$key[i]) {
insurance_lapse$lapse[i-1] = "no"
}
}
}
insurance_lapse$mortality = NULL
insurance_lapse# saveRDS(insurance, file = "insurance.RDS")
# saveRDS(insurance_mortality, file = "insurance_mortality.RDS")
# saveRDS(insurance_lapse, file = "insurance_lapse.RDS")
insurance <- readRDS("insurance.RDS")
insurance_mortality <- readRDS("insurance_mortality.RDS")
insurance_lapse <- readRDS("insurance_lapse.RDS")